import requests
from bs4 import BeautifulSoup
from lxml import etree
import re
from selenium import webdriver
import time

headersParameters = {  # 发送HTTP请求时的HEAD信息，用于伪装为浏览器
    'Connection': 'Keep-Alive',
    'Accept': 'text/html, application/xhtml+xml, */*',
    'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
    'Accept-Encoding': 'gzip, deflate',
    'User-Agent': 'Mozilla/6.1 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
browser=webdriver.Chrome()
keyword = "百瑞赢"

url = u'https://m.sogou.com/web/searchList.jsp?keyword='+keyword
browser.get(url)
#停顿两秒，点击下一页
time.sleep(2)
mya = browser.find_element_by_id('ajax_next_page')
mya.click()
#停顿两秒，再点击下一页
time.sleep(2)
mya = browser.find_element_by_id('ajax_next_page')
mya.click()
#停顿两秒，再点击下一页
time.sleep(2)
mya = browser.find_element_by_id('ajax_next_page')
mya.click()
#停顿两秒，再点击下一页
time.sleep(2)
mya = browser.find_element_by_id('ajax_next_page')
mya.click()

data=browser.page_source
browser.close()



soup=BeautifulSoup(data,'html.parser')
h3 = soup.find_all('h3')


for myh3 in h3:
        try:
            table=myh3.find('a').get_text()
            print(table)
            href = myh3.find('a').get('href')
            
            sougou_content = requests.get(url='https://m.sogou.com/web'+href.replace("./","/"), headers=headersParameters, allow_redirects=True).text
            #匹配原网址
            searchObj = re.findall(r'href\=\"(.*)\"',sougou_content,re.I)
            source_url = ''
            if len(searchObj) > 0:
                source_url = searchObj[1]
            print(source_url)
          
        except Exception as e:
            print(str(e))


            